Wavelet Analysis of spectral data

This document presents the steps and results of a wavelet analysis from the raw data to the final plot presented in Thor et al.

Initial Curve Inspection

Below we present the raw curves of fluorescence intensity over time, split by cell-number, tech-rep, and genotype. These intensity curves are very noisy with an intensity decay as time progresses.

library(magrittr)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(purrr)
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:magrittr':
## 
##     set_names
library(WaveletComp)
## 
## Attaching package: 'WaveletComp'
## The following object is masked from 'package:ggplot2':
## 
##     arrow
library(MESS)


source("../src/load_data.R")
source("../src/plots.R")
d <- read_peaks("../data/raw_data.xlsx") %>% rename_kathrin()
d
## # A tibble: 174,202 x 9
##    minutes start_time line  cell_number tech_rep bio_rep reporter intensity
##      <dbl>      <dbl> <chr> <chr>       <chr>    <chr>   <chr>        <dbl>
##  1   0           9.83 WT    1           TechRep1 BioRep1 YFP           700.
##  2   0.333       9.83 WT    1           TechRep1 BioRep1 YFP           658.
##  3   0.667       9.83 WT    1           TechRep1 BioRep1 YFP           658.
##  4   1           9.83 WT    1           TechRep1 BioRep1 YFP           661.
##  5   1.33        9.83 WT    1           TechRep1 BioRep1 YFP           650.
##  6   1.67        9.83 WT    1           TechRep1 BioRep1 YFP           645.
##  7   2           9.83 WT    1           TechRep1 BioRep1 YFP           644.
##  8   2.33        9.83 WT    1           TechRep1 BioRep1 YFP           644.
##  9   2.67        9.83 WT    1           TechRep1 BioRep1 YFP           644.
## 10   3           9.83 WT    1           TechRep1 BioRep1 YFP           644.
## # … with 174,192 more rows, and 1 more variable: has_started <lgl>
d %>% all_data_plot()

YFP/CFP Ratio over time

The computed YFP/CFP ratio is the primary metric of interest. Data are presented up to 50 minutes, the data collection end point. The curves remain noisy but the initial peak followed by decay is observable.

ratio_added <- d %>% 
  filter(has_started == TRUE, minutes <= 50) %>%
  group_by(line, bio_rep, tech_rep, cell_number) %>% 
  ratio_curve(value = intensity )  

ratio_added %>% ratio_plot()

Wavelet Extraction of Signifcant Signal

In order to find the significant large signal structure within the noisy curve, we apply a wavelet reconstruction of the curves.

wavelet_reconstruction <- function(df, span = 100, re_scale = FALSE, val_col = "intensity"){
  bin <- capture.output(wave <- analyze.wavelet(df, val_col, loess.span = span, verbose = FALSE) )
  bin <- capture.output(r <- reconstruct(wave, plot.waves = FALSE, plot.rec = FALSE, rescale = re_scale) )
  
  tag <- paste0(val_col, ".r")
  return(data.frame(reconstructed_intensity = r$series[[tag]]) )
}



ratio_added %>% 
  filter(has_started == TRUE, minutes <= 50) %>%
  group_by(line, bio_rep, tech_rep, cell_number) %>%
  nest() %>%
  mutate(reconstructed = map(data, wavelet_reconstruction, val_col = "ratio" ) ) %>%
  unnest() %>%
  ratio_recons_plot()
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(data, reconstructed)`

Area Under Curve \(t \leq 5 \textrm{minutes}\)

Assuming that OSCA1.3 as a plasma membrane calcium channel activated by BIK1 is involved in the immediate calcium influx after flg22 perception. We therefore can look at the area under the wavelet curve (AUC) in the first five minutes.

get_auc <- function(df){
  
  steps <- 1:length(df$ratio)
  auc(steps, df$ratio, absolutearea = TRUE)
  
}

auc_data_short <- ratio_added %>% 
  filter(has_started == TRUE, minutes <= (start_time + 5) ) %>%
  group_by(line, bio_rep, tech_rep, cell_number) %>%
  nest() %>%
  mutate(aucurve = map(data, get_auc)) %>%
  unnest( .preserve = c(aucurve)) %>% unnest() %>% 
  group_by(line, bio_rep, tech_rep, cell_number) %>%
  summarise(n_auc = n_distinct(aucurve), act_auc = first(aucurve) ) %>% 
  ungroup()
## Warning: The `.preserve` argument of `unnest()` is deprecated as of tidyr 1.0.0.
## All list-columns are now preserved
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(data)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(aucurve)`
## `summarise()` regrouping output by 'line', 'bio_rep', 'tech_rep' (override with `.groups` argument)
auc_data_short
## # A tibble: 125 x 6
##    line        bio_rep tech_rep cell_number n_auc act_auc
##    <chr>       <chr>   <chr>    <chr>       <int>   <dbl>
##  1 osca1.3/1.7 BioRep1 TechRep1 1               1    86.5
##  2 osca1.3/1.7 BioRep1 TechRep1 2               1    90.7
##  3 osca1.3/1.7 BioRep1 TechRep1 3               1    85.9
##  4 osca1.3/1.7 BioRep1 TechRep1 4               1    75.0
##  5 osca1.3/1.7 BioRep1 TechRep1 5               1    91.1
##  6 osca1.3/1.7 BioRep1 TechRep1 6               1    98.5
##  7 osca1.3/1.7 BioRep1 TechRep2 1               1   113. 
##  8 osca1.3/1.7 BioRep1 TechRep2 2               1    97.3
##  9 osca1.3/1.7 BioRep1 TechRep2 3               1    92.4
## 10 osca1.3/1.7 BioRep1 TechRep2 4               1    86.8
## # … with 115 more rows
ggplot(auc_data_short) + aes(bio_rep, as.integer(act_auc) ) + geom_boxplot(aes(colour = line), notch = TRUE) + geom_jitter(aes(colour = line), position = position_dodge(width = 0.5)) 
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.

Significance Testing of AUC \(t \leq 5 \textrm{minutes}\).

A linear model of the AUC responding to genotype followed by ANOVA is used to test the null hypothesis that the difference between the mean AUC between genotypes is equal to 0.

library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
model <- lme(act_auc ~ line, random =~ 1|bio_rep, data = auc_data_short)
summary(model)
## Linear mixed-effects model fit by REML
##  Data: auc_data_short 
##        AIC      BIC    logLik
##   909.5038 920.7526 -450.7519
## 
## Random effects:
##  Formula: ~1 | bio_rep
##         (Intercept) Residual
## StdDev:    4.568105 8.890754
## 
## Fixed effects: act_auc ~ line 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 86.53165  2.552796 120 33.89682  0.0000
## lineWT       4.94677  1.593509 120  3.10433  0.0024
##  Correlation: 
##        (Intr)
## lineWT -0.319
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -1.903292957 -0.608880677  0.005725672  0.602274223  2.813395713 
## 
## Number of Observations: 125
## Number of Groups: 4
anova(model)
##             numDF denDF   F-value p-value
## (Intercept)     1   120 1355.4500  <.0001
## line            1   120    9.6368  0.0024

With the model $ p < 0.05$, indicating that the difference between the mean AUCs of the genotypes is not likely to be 0. We therefore reject the null hypothesis.

Effect Size Between Genotypes

The size of the effect is computed by taking difference in means between AUC in the genotypes. We express as a percentage difference.

library(dplyr)
group_means <- auc_data_short %>% 
  ungroup() %>% 
  group_by(line) %>%
  summarise(mean = mean(act_auc)) 
## `summarise()` ungrouping output (override with `.groups` argument)
group_means
## # A tibble: 2 x 2
##   line         mean
##   <chr>       <dbl>
## 1 osca1.3/1.7  86.7
## 2 WT           91.6

Percent change = 5.4049494 .

Final Image

A rendering of the AUC data in a single, print-friendly panel.

library(ggplot2)
library(forcats)

auc_data_short$line <- factor(auc_data_short$line, levels = rev(levels(as.factor(auc_data_short$line))))
auc_data_short
## # A tibble: 125 x 6
##    line        bio_rep tech_rep cell_number n_auc act_auc
##    <fct>       <chr>   <chr>    <chr>       <int>   <dbl>
##  1 osca1.3/1.7 BioRep1 TechRep1 1               1    86.5
##  2 osca1.3/1.7 BioRep1 TechRep1 2               1    90.7
##  3 osca1.3/1.7 BioRep1 TechRep1 3               1    85.9
##  4 osca1.3/1.7 BioRep1 TechRep1 4               1    75.0
##  5 osca1.3/1.7 BioRep1 TechRep1 5               1    91.1
##  6 osca1.3/1.7 BioRep1 TechRep1 6               1    98.5
##  7 osca1.3/1.7 BioRep1 TechRep2 1               1   113. 
##  8 osca1.3/1.7 BioRep1 TechRep2 2               1    97.3
##  9 osca1.3/1.7 BioRep1 TechRep2 3               1    92.4
## 10 osca1.3/1.7 BioRep1 TechRep2 4               1    86.8
## # … with 115 more rows
xlabels <- c(
  expression(paste("WT")), 
  expression(paste(italic("osca1.3/1.7")))
  )


ggplot(auc_data_short) + aes(line, as.integer(act_auc) ) + geom_boxplot(aes(colour = line), notch = TRUE) + geom_jitter(aes(colour = line, shape = bio_rep), position = position_dodge(width = 0.5))  +
  scale_x_discrete(labels = xlabels ) +
  theme_bw() + 
  labs(x = NULL, y = "Area Under Curve") + 
  theme(legend.position = "none") + 
  scale_colour_manual(values= c("black", "black")) + 
  theme(axis.title = element_text(family = "Helvetica", size = 14)) + 
  theme(axis.text.x = element_text(family = "Helvetica", size = 12, colour = "black"))

ggsave("cameleon.svg", width = 55, height = 100, units = "mm")
ggsave("cameleon.png", width = 55, height = 100, units = "mm")

readr::write_csv(auc_data_short, "graph_data.csv")